pepRaw = pepRaw %>% mutate(run = c('ctrl','a','b','c','d','e')[match(pepRaw$file, files)]) %>%
group_by(file, `Annotated Sequence`) %>% mutate(pepID = letters[1:n()]) %>% ungroup() %>%
select(file, run, pepID, everything())
pepTidy = pepRaw %>% gather(key = label, value = intensity, '126':'131') %>%
select(run, label, intensity, everything(), -file) %>%
mutate(condition = ifelse(run == 'ctrl', 'ctrl',c('ctrl',0,20,40,60,80)[match(label,c(126:131))]),
intensity = as.numeric(intensity)) %>% unite(peptide, c(`Annotated Sequence`,'pepID')) %>%
rename(Uniprot = `Master Protein Accessions`) %>% unite(cell, c('run','label','condition'), remove = F) %>%
select(cell, run, label, condition, peptide, Uniprot, intensity)
ggplot(pepTidy, aes(log2(intensity), colour = cell)) + geom_density() + theme_bw()
The peptide intensities appear to follow a log-normal distribution.
pairs = pepTidy %>% filter(condition == 'ctrl') %>% select(cell, peptide, intensity) %>% spread(cell, intensity)
ggpairs(log2(pairs[,2:ncol(pairs)]), lower = list(continuous = wrap("points", alpha = 0.1, size = 0.1))) + theme_bw()
meanSD = pepTidy %>% group_by(peptide) %>% summarise(mean = mean(log2(intensity), na.rm = T), sd = sd(log2(intensity), na.rm = T) / mean)
ggplot(meanSD, aes(x = mean, y = sd)) + geom_point() + geom_smooth(method = lm, formula = y ~ poly(x, 2))
model = lm(sd ~ poly(mean, degree = 2), data = meanSD)
pepTidy = pepTidy %>% group_by(cell) %>% mutate(intensity = intensity / median(intensity, na.rm = T))
ggplot(pepTidy, aes(log2(intensity), colour = cell)) + geom_density() + theme_bw()
pairs = pepTidy %>% filter(condition == 'ctrl') %>% select(cell, peptide, intensity) %>% spread(cell, intensity)
ggpairs(log2(pairs[,2:ncol(pairs)]), lower = list(continuous = wrap("points", alpha = 0.1, size = 0.1))) + theme_bw()
pepTidy = pepTidy %>% group_by(run, peptide) %>% mutate(intensity = intensity / median(intensity, na.rm = T)) %>% ungroup()
ggplot(pepTidy, aes(log2(intensity), colour = cell)) + geom_density() + theme_bw()
pairs = pepTidy %>% filter(condition == 'ctrl') %>% select(cell, peptide, intensity) %>% spread(cell, intensity)
ggpairs(log2(pairs[,2:ncol(pairs)]), lower = list(continuous = wrap("points", alpha = 0.1, size = 0.1))) + theme_bw()
There are
pepTidy %>% filter(abs(log2(intensity)) < 2*sd(log2(pepTidy$intensity), na.rm = T))
## # A tibble: 195,387 x 7
## cell run label condition peptide Uniprot intensity
## <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 ctrl_126_… ctrl 126 ctrl yGkDATNVGDEGGFAPNILE… Q7SZ25 0.682
## 2 ctrl_126_… ctrl 126 ctrl aWVWNTYADYADELPkPELL… O42193 1.41
## 3 ctrl_126_… ctrl 126 ctrl lSGVSLSSDAFFPFkDNLER… Q6ING0 1.06
## 4 ctrl_126_… ctrl 126 ctrl lSGVSLSSDAFFPFkDNLER… Q6ING0 0.926
## 5 ctrl_126_… ctrl 126 ctrl iVATTLNTPELFDEWRDNVk… Q7ZTK9 0.956
## 6 ctrl_126_… ctrl 126 ctrl iLSEENSDFSVNLFNQLSTE… Q00387 0.987
## 7 ctrl_126_… ctrl 126 ctrl eTVVEVPQVTWEDIGGLEDV… P23787 0.869
## 8 ctrl_126_… ctrl 126 ctrl vLAIAVETDYSFPLADkVk_a Q8AVI3 1.09
## 9 ctrl_126_… ctrl 126 ctrl aLAYQNPQVGVLENLHAAAY… Q6DE33 0.853
## 10 ctrl_126_… ctrl 126 ctrl nYPVVSIEDPFDQDHWEAWT… Q7SZ25 1.27
## # … with 195,377 more rows
ggplot(pepTidy, aes(log2(intensity), colour = cell)) + geom_density() + theme_bw()
The standard deviation of all peptides across eggs was calculated, and every peptide that had a value outside of 0.683984
if(
)